{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "233ab97d4142251c",
   "metadata": {},
   "source": [
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/vector_stores/DashvectorIndexDemo-Hybrid.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c23092cd21d8ac40",
   "metadata": {},
   "source": [
    "# DashVector Vector Store - Hybrid Search"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "25bbda731abd4867",
   "metadata": {},
   "source": [
    "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16124e83e72cc4d",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip3 install llama-index dashvector dashtext"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa64ed302be62bdd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import sys\n",
    "import os\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f162aa57ee911afb",
   "metadata": {},
   "source": [
    "## Create a DashVector Collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46f5818a0cd63e0a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import dashvector\n",
    "\n",
    "api_key = os.environ[\"DASHVECTOR_API_KEY\"]\n",
    "endpoint = os.environ[\"DASHVECTOR_ENDPOINT\"]\n",
    "\n",
    "client = dashvector.Client(api_key=api_key, endpoint=endpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "223efb94d5b33c78",
   "metadata": {},
   "outputs": [],
   "source": [
    "# delete if needed\n",
    "# client.delete(\"quickstart\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fa984bbe74675074",
   "metadata": {},
   "outputs": [],
   "source": [
    "# dimensions are for text-embedding-ada-002\n",
    "# NOTE: Sparse Vector only supported for dotproduct metric\n",
    "client.create(\"quickstart\", dimension=1536, metric=\"dotproduct\")\n",
    "\n",
    "dashvector_collection = client.get(\"quickstart\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "263c7726a6d876e3",
   "metadata": {},
   "source": [
    "## Download data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "58f99afe5597dd39",
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir -p 'data/paul_graham/'\n",
    "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cb76284b62356e37",
   "metadata": {},
   "source": [
    "## Load documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9181bf4cabbd7655",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index import SimpleDirectoryReader\n",
    "\n",
    "# load documents\n",
    "documents = SimpleDirectoryReader(\"./data/paul_graham\").load_data()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "63fca575edc30c4b",
   "metadata": {},
   "source": [
    "## Build The VectorStoreIndex with DashVectorStore"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a8a1c4df35e423",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index import VectorStoreIndex\n",
    "from llama_index.vector_stores import DashVectorStore\n",
    "from llama_index.storage.storage_context import StorageContext\n",
    "\n",
    "vector_store = DashVectorStore(\n",
    "    collection=dashvector_collection, support_sparse_vector=True\n",
    ")\n",
    "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
    "index = VectorStoreIndex.from_documents(\n",
    "    documents, storage_context=storage_context\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "44309b4e075d9af5",
   "metadata": {},
   "source": [
    "## Query with Default Query Engine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1eec068d4f7afc94",
   "metadata": {},
   "outputs": [],
   "source": [
    "query_engine = index.as_query_engine()\n",
    "response = query_engine.query(\"What did the author do growing up?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aef42fe88cd925e6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>The author, growing up, worked on writing and programming outside of school. They wrote short stories and tried writing programs on an IBM 1401 computer. They later got a microcomputer and started programming more extensively, writing simple games and a word processor.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.display import Markdown, display\n",
    "\n",
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "abd69099019bd286",
   "metadata": {},
   "source": [
    "## Query with Hybrid Query Mode "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "804a26b42e3bb983",
   "metadata": {},
   "outputs": [],
   "source": [
    "query_engine = index.as_query_engine(vector_store_query_mode=\"hybrid\")\n",
    "response = query_engine.query(\"What did the author do growing up?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a6620dc52961027",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>The author wrote short stories and also worked on programming, specifically on an IBM 1401 computer in 9th grade.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
